From: mwilli2@equilibrium.research.intel-research.net Date: Tue, 4 May 2004 14:21:44 +0000 (+0000) Subject: bitkeeper revision 1.879.3.1 (4097a6f8Q5eWNArSydr2Qh2tZnFF4w) X-Git-Tag: archive/raspbian/4.8.0-1+rpi1~1^2~18234^2 X-Git-Url: https://dgit.raspbian.org/%22http:/www.example.com/cgi/%22https:/%22bookmarks://%22/%22http:/www.example.com/cgi/%22https:/%22bookmarks:/%22?a=commitdiff_plain;h=3f48ce6ee181b1f97b223357909542cf8a7e0180;p=xen.git bitkeeper revision 1.879.3.1 (4097a6f8Q5eWNArSydr2Qh2tZnFF4w) Support for selectively granting IO resource privileges. Domains that access physical devices now don't need to be fully privileged. --- diff --git a/tools/examples/xc_dom_create.py b/tools/examples/xc_dom_create.py index b898443ecf..e803737ef0 100755 --- a/tools/examples/xc_dom_create.py +++ b/tools/examples/xc_dom_create.py @@ -239,12 +239,8 @@ def make_domain(): xc.domain_destroy ( dom=id ) sys.exit() - # will the domain have IO privileges? - if pci_device_list != []: io_priv = True - else: io_priv = False - if restore: - ret = eval('xc.%s_restore ( dom=id, state_file=state_file, progress=1, io_priv=%d )' % (builder_fn, io_priv)) + ret = eval('xc.%s_restore ( dom=id, state_file=state_file, progress=1 )' % builder_fn ) if ret < 0: print "Error restoring domain" print "Return code = " + str(ret) @@ -252,7 +248,7 @@ def make_domain(): sys.exit() else: - ret = eval('xc.%s_build ( dom=id, image=image, ramdisk=ramdisk, cmdline=cmdline, control_evtchn=xend_response["remote_port"], io_priv=%d )' % (builder_fn, io_priv) ) + ret = eval('xc.%s_build ( dom=id, image=image, ramdisk=ramdisk, cmdline=cmdline, control_evtchn=xend_response["remote_port"] )' % builder_fn ) if ret < 0: print "Error building Linux guest OS: " print "Return code = " + str(ret) diff --git a/tools/xc/lib/xc.h b/tools/xc/lib/xc.h index eb1b07da91..9a0fab2257 100644 --- a/tools/xc/lib/xc.h +++ b/tools/xc/lib/xc.h @@ -74,15 +74,13 @@ int xc_linux_build(int xc_handle, const char *image_name, const char *ramdisk_name, const char *cmdline, - unsigned int control_evtchn, - int io_priv); + unsigned int control_evtchn); int xc_netbsd_build(int xc_handle, u64 domid, const char *image_name, const char *cmdline, - unsigned int control_evtchn, - int io_priv); + unsigned int control_evtchn); int xc_bvtsched_global_set(int xc_handle, unsigned long ctx_allow); diff --git a/tools/xc/lib/xc_linux_build.c b/tools/xc/lib/xc_linux_build.c index 27bc6c6668..f1bd182827 100644 --- a/tools/xc/lib/xc_linux_build.c +++ b/tools/xc/lib/xc_linux_build.c @@ -73,8 +73,7 @@ static int setup_guestos(int xc_handle, dom0_builddomain_t *builddomain, const char *cmdline, unsigned long shared_info_frame, - unsigned int control_evtchn, - int io_priv) + unsigned int control_evtchn) { l1_pgentry_t *vl1tab=NULL, *vl1e=NULL; l2_pgentry_t *vl2tab=NULL, *vl2e=NULL; @@ -270,7 +269,7 @@ static int setup_guestos(int xc_handle, memset(start_info, 0, sizeof(*start_info)); start_info->nr_pages = nr_pages; start_info->shared_info = shared_info_frame << PAGE_SHIFT; - start_info->flags = io_priv ? SIF_PRIVILEGED : 0; + start_info->flags = 0; start_info->pt_base = vpt_start; start_info->nr_pt_frames = nr_pt_pages; start_info->mfn_list = vphysmap_start; @@ -383,8 +382,7 @@ int xc_linux_build(int xc_handle, const char *image_name, const char *ramdisk_name, const char *cmdline, - unsigned int control_evtchn, - int io_priv) + unsigned int control_evtchn) { dom0_op_t launch_op, op; int initrd_fd = -1; @@ -442,7 +440,7 @@ int xc_linux_build(int xc_handle, &vstartinfo_start, &vkern_entry, &launch_op.u.builddomain, cmdline, op.u.getdomaininfo.shared_info_frame, - control_evtchn, io_priv) < 0 ) + control_evtchn) < 0 ) { ERROR("Error constructing guest OS"); goto error_out; diff --git a/tools/xc/lib/xc_netbsd_build.c b/tools/xc/lib/xc_netbsd_build.c index cac444bd80..8793a512f2 100644 --- a/tools/xc/lib/xc_netbsd_build.c +++ b/tools/xc/lib/xc_netbsd_build.c @@ -62,8 +62,7 @@ static int setup_guestos(int xc_handle, dom0_builddomain_t *builddomain, const char *cmdline, unsigned long shared_info_frame, - unsigned int control_evtchn, - int io_priv) + unsigned int control_evtchn) { l1_pgentry_t *vl1tab=NULL, *vl1e=NULL; l2_pgentry_t *vl2tab=NULL, *vl2e=NULL; @@ -177,7 +176,7 @@ static int setup_guestos(int xc_handle, start_info->mod_len = symtab_len; start_info->nr_pages = tot_pages; start_info->shared_info = shared_info_frame << PAGE_SHIFT; - start_info->flags = io_priv ? SIF_PRIVILEGED : 0; + start_info->flags = 0; start_info->domain_controller_evtchn = control_evtchn; strncpy(start_info->cmd_line, cmdline, MAX_CMDLINE); start_info->cmd_line[MAX_CMDLINE-1] = '\0'; @@ -214,8 +213,7 @@ int xc_netbsd_build(int xc_handle, u64 domid, const char *image_name, const char *cmdline, - unsigned int control_evtchn, - int io_priv) + unsigned int control_evtchn) { dom0_op_t launch_op, op; unsigned long load_addr; @@ -265,7 +263,7 @@ int xc_netbsd_build(int xc_handle, &virt_startinfo_addr, &load_addr, &launch_op.u.builddomain, cmdline, op.u.getdomaininfo.shared_info_frame, - control_evtchn, io_priv) < 0 ) + control_evtchn) < 0 ) { ERROR("Error constructing guest OS"); goto error_out; diff --git a/tools/xc/py/Xc.c b/tools/xc/py/Xc.c index 322a20b411..6453281a61 100644 --- a/tools/xc/py/Xc.c +++ b/tools/xc/py/Xc.c @@ -228,19 +228,18 @@ static PyObject *pyxc_linux_build(PyObject *self, u64 dom; char *image, *ramdisk = NULL, *cmdline = ""; - int control_evtchn, io_priv = 0; + int control_evtchn; static char *kwd_list[] = { "dom", "control_evtchn", - "image", "ramdisk", "cmdline", "io_priv", - NULL }; + "image", "ramdisk", "cmdline", NULL }; - if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Lis|ssi", kwd_list, + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Lis|ss", kwd_list, &dom, &control_evtchn, - &image, &ramdisk, &cmdline, &io_priv) ) + &image, &ramdisk, &cmdline) ) return NULL; if ( xc_linux_build(xc->xc_handle, dom, image, - ramdisk, cmdline, control_evtchn, io_priv) != 0 ) + ramdisk, cmdline, control_evtchn) != 0 ) return PyErr_SetFromErrno(xc_error); Py_INCREF(zero); @@ -255,19 +254,18 @@ static PyObject *pyxc_netbsd_build(PyObject *self, u64 dom; char *image, *ramdisk = NULL, *cmdline = ""; - int control_evtchn, io_priv = 0; + int control_evtchn; static char *kwd_list[] = { "dom", "control_evtchn", - "image", "ramdisk", "cmdline", "io_priv", - NULL }; + "image", "ramdisk", "cmdline", NULL }; if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Lis|ssi", kwd_list, &dom, &control_evtchn, - &image, &ramdisk, &cmdline, &io_priv) ) + &image, &ramdisk, &cmdline) ) return NULL; if ( xc_netbsd_build(xc->xc_handle, dom, image, - cmdline, control_evtchn, io_priv) != 0 ) + cmdline, control_evtchn) != 0 ) return PyErr_SetFromErrno(xc_error); Py_INCREF(zero); @@ -1162,8 +1160,7 @@ static PyMethodDef pyxc_methods[] = { " dom [long]: Identifier of domain to build into.\n" " image [str]: Name of kernel image file. May be gzipped.\n" " ramdisk [str, n/a]: Name of ramdisk file, if any.\n" - " cmdline [str, n/a]: Kernel parameters, if any.\n" - " io_priv [boolean]: Does the domain have IO privileges?\n\n" + " cmdline [str, n/a]: Kernel parameters, if any.\n\n" "Returns: [int] 0 on success; -1 on error.\n" }, { "netbsd_build", @@ -1172,15 +1169,14 @@ static PyMethodDef pyxc_methods[] = { "Build a new NetBSD guest OS.\n" " dom [long]: Identifier of domain to build into.\n" " image [str]: Name of kernel image file. May be gzipped.\n" - " cmdline [str, n/a]: Kernel parameters, if any.\n" - " io_priv [boolean]: Does the domain have IO privileges?\n\n" + " cmdline [str, n/a]: Kernel parameters, if any.\n\n" "Returns: [int] 0 on success; -1 on error.\n" }, { "bvtsched_global_set", (PyCFunction)pyxc_bvtsched_global_set, METH_VARARGS | METH_KEYWORDS, "\n" "Set global tuning parameters for Borrowed Virtual Time scheduler.\n" - " ctx_allow [int]: Minimal guaranteed quantum (I think!).\n\n" + " ctx_allow [int]: Minimal guaranteed quantum.\n\n" "Returns: [int] 0 on success; -1 on error.\n" }, { "bvtsched_global_get", @@ -1195,10 +1191,10 @@ static PyMethodDef pyxc_methods[] = { METH_VARARGS | METH_KEYWORDS, "\n" "Set per-domain tuning parameters for Borrowed Virtual Time scheduler.\n" " dom [long]: Identifier of domain to be tuned.\n" - " mcuadv [int]: Internal BVT parameter.\n" - " warp [int]: Internal BVT parameter.\n" - " warpl [int]: Internal BVT parameter.\n" - " warpu [int]: Internal BVT parameter.\n\n" + " mcuadv [int]: Proportional to the inverse of the domain's weight.\n" + " warp [int]: How far to warp domain's EVT on unblock.\n" + " warpl [int]: How long the domain can run warped.\n" + " warpu [int]: How long before the domain can warp again.\n\n" "Returns: [int] 0 on success; -1 on error.\n" }, { "bvtsched_domain_get", diff --git a/xen/arch/i386/irq.c b/xen/arch/i386/irq.c index 4c50bb592d..d3eaf6af12 100644 --- a/xen/arch/i386/irq.c +++ b/xen/arch/i386/irq.c @@ -1001,7 +1001,7 @@ int pirq_guest_bind(struct task_struct *p, int irq, int will_share) irq_guest_action_t *action; int rc = 0; - if ( !IS_PRIV(p) ) + if ( !IS_CAPABLE_PHYSDEV(p) ) return -EPERM; spin_lock_irqsave(&desc->lock, flags); diff --git a/xen/arch/i386/process.c b/xen/arch/i386/process.c index ea5c51d176..29c4fde8cb 100644 --- a/xen/arch/i386/process.c +++ b/xen/arch/i386/process.c @@ -227,12 +227,14 @@ void new_thread(struct task_struct *p, : /* no output */ \ :"r" (thread->debugreg[register])) + void switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *next = &next_p->thread; struct tss_struct *tss = init_tss + smp_processor_id(); execution_context_t *stack_ec = get_execution_context(); - + int i; + __cli(); /* Switch guest general-register state. */ @@ -280,6 +282,58 @@ void switch_to(struct task_struct *prev_p, struct task_struct *next_p) } } + if ( ( prev_p->io_bitmap != NULL ) || ( next_p->io_bitmap != NULL ) ) { + if ( next_p->io_bitmap != NULL ) { + /* Copy in the appropriate parts of the IO bitmap. We use the + * selector to copy only the interesting parts of the bitmap. */ + + u64 old_sel = ~0ULL; /* IO bitmap selector for previous task. */ + + if ( prev_p->io_bitmap != NULL) + { + old_sel = prev_p->io_bitmap_sel; + + /* Replace any areas of the IO bitmap that had bits cleared. */ + for ( i = 0; i < sizeof(prev_p->io_bitmap_sel) * 8; i++ ) + if ( !test_bit(i, &prev_p->io_bitmap_sel) ) + memcpy(&tss->io_bitmap[i * IOBMP_SELBIT_LWORDS], + &next_p->io_bitmap[i * IOBMP_SELBIT_LWORDS], + IOBMP_SELBIT_LWORDS * sizeof(unsigned long)); + } + + /* Copy in any regions of the new task's bitmap that have bits + * clear and we haven't already dealt with. */ + for ( i = 0; i < sizeof(prev_p->io_bitmap_sel) * 8; i++ ) + { + if ( test_bit(i, &old_sel) + && !test_bit(i, &next_p->io_bitmap_sel) ) + memcpy(&tss->io_bitmap[i * IOBMP_SELBIT_LWORDS], + &next_p->io_bitmap[i * IOBMP_SELBIT_LWORDS], + IOBMP_SELBIT_LWORDS * sizeof(unsigned long)); + } + + tss->bitmap = IO_BITMAP_OFFSET; + + } else { + /* In this case, we're switching FROM a task with IO port access, + * to a task that doesn't use the IO bitmap. We set any TSS bits + * that might have been cleared, ready for future use. */ + for ( i = 0; i < sizeof(prev_p->io_bitmap_sel) * 8; i++ ) + if ( !test_bit(i, &prev_p->io_bitmap_sel) ) + memset(&tss->io_bitmap[i * IOBMP_SELBIT_LWORDS], + 0xFF, IOBMP_SELBIT_LWORDS * sizeof(unsigned long)); + + /* + * a bitmap offset pointing outside of the TSS limit + * causes a nicely controllable SIGSEGV if a process + * tries to use a port IO instruction. The first + * sys_ioperm() call sets up the bitmap properly. + */ + tss->bitmap = INVALID_IO_BITMAP_OFFSET; + } + } + + /* Switch page tables. */ write_ptbase(&next_p->mm); tlb_clocktick(); diff --git a/xen/arch/i386/traps.c b/xen/arch/i386/traps.c index d50b101f3a..f362faa05f 100644 --- a/xen/arch/i386/traps.c +++ b/xen/arch/i386/traps.c @@ -657,7 +657,7 @@ __asm__ __volatile__ ("movw %w3,0(%2)\n\t" \ void set_tss_desc(unsigned int n, void *addr) { - _set_tssldt_desc(gdt_table+__TSS(n), (int)addr, 235, 0x89); + _set_tssldt_desc(gdt_table+__TSS(n), (int)addr, 8299, 0x89); } void __init trap_init(void) diff --git a/xen/common/physdev.c b/xen/common/physdev.c index b57c6b564b..6375fd48f0 100644 --- a/xen/common/physdev.c +++ b/xen/common/physdev.c @@ -126,7 +126,7 @@ int physdev_pci_access_modify( { struct task_struct *p; struct pci_dev *pdev; - int rc = 0; + int i, j, rc = 0; if ( !IS_PRIV(current) ) BUG(); @@ -146,7 +146,7 @@ int physdev_pci_access_modify( return -ESRCH; /* Make the domain privileged. */ - set_bit(PF_PRIVILEGED, &p->flags); + set_bit(PF_PHYSDEV, &p->flags); /* Grant write access to the specified device. */ if ( (pdev = pci_find_slot(bus, PCI_DEVFN(dev, func))) == NULL ) @@ -164,6 +164,55 @@ int physdev_pci_access_modify( if ( pdev->hdr_type != PCI_HEADER_TYPE_NORMAL ) INFO("XXX can't give access to bridge devices yet\n"); + /* Now, setup access to the IO ports and memory regions for the device. */ + + if ( p->io_bitmap == NULL ) + { + p->io_bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); + if ( p->io_bitmap == NULL ) + { + rc = -ENOMEM; + goto out; + } + memset(p->io_bitmap, 0xFF, IO_BITMAP_BYTES); + + p->io_bitmap_sel = ~0ULL; + } + + for ( i = 0; i < DEVICE_COUNT_RESOURCE; i++ ) + { + struct resource *r = &pdev->resource[i]; + + if ( r->flags & IORESOURCE_IO ) + { + /* Give the domain access to the IO ports it needs. Currently, + * this will allow all processes in that domain access to those + * ports as well. This will do for now, since driver domains don't + * run untrusted processes! */ + INFO("Giving domain %llu IO resources (%lx - %lx) " + "for device %s\n", dom, r->start, r->end, pdev->slot_name); + for ( j = r->start; j < r->end + 1; j++ ) + { + clear_bit(j, p->io_bitmap); + /* Record that we cleared a bit using bit n of the selector: + * n = (j / (4 bytes in a word * 8 bits in a byte)) + * / number of words per selector bit + */ + clear_bit((j / (8 * 4)) / IOBMP_SELBIT_LWORDS, + &p->io_bitmap_sel); + } + } + else if ( r->flags & IORESOURCE_MEM ) + { + /* allow domain to map IO memory for this device */ + INFO("Giving domain %llu memory resources (%lx - %lx) " + "for device %s\n", dom, r->start, r->end, pdev->slot_name); + for ( j = r->start; j < r->end + 1; j += PAGE_SIZE ) + SHARE_PFN_WITH_DOMAIN(frame_table + (j >> PAGE_SHIFT), p); + } + } + + out: put_task_struct(p); return rc; @@ -180,8 +229,8 @@ inline static int check_dev_acc (struct task_struct *p, *pdev = NULL; - if ( !IS_PRIV(p) ) - return -EPERM; /* no pci acces permission */ + if ( !IS_CAPABLE_PHYSDEV(p) ) + return -EPERM; /* no pci access permission */ if ( bus > PCI_BUSMAX || dev > PCI_DEVMAX || func > PCI_FUNCMAX ) return -EINVAL; @@ -651,5 +700,7 @@ void physdev_init_dom0(struct task_struct *p) dev->slot_name); } } + + set_bit(PF_PHYSDEV, &p->flags); } diff --git a/xen/include/asm-i386/processor.h b/xen/include/asm-i386/processor.h index 26f64d1f9f..2968e2e4e9 100644 --- a/xen/include/asm-i386/processor.h +++ b/xen/include/asm-i386/processor.h @@ -287,9 +287,12 @@ extern unsigned int mca_pentium_flag; #define TASK_UNMAPPED_BASE (TASK_SIZE / 3) /* - * Size of io_bitmap in longwords: 32 is ports 0-0x3ff. + * Size of io_bitmap in longwords: + * For Xen we support the full 8kbyte IO bitmap but use the io_bitmap_sel field + * of the task_struct to avoid a full 8kbyte copy when switching to / from + * domains with bits cleared. */ -#define IO_BITMAP_SIZE 32 +#define IO_BITMAP_SIZE 2048 #define IO_BITMAP_BYTES (IO_BITMAP_SIZE * 4) #define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap) #define INVALID_IO_BITMAP_OFFSET 0x8000 @@ -429,7 +432,7 @@ long set_fast_trap(struct task_struct *p, int idx); 0,0,0,0,0,0, /* ds,fs,gs */ \ 0,0, /* ldt */ \ 0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */ \ - {~0, } /* ioperm */ \ + { [0 ... IO_BITMAP_SIZE] = ~0UL }, /* ioperm */ \ } struct mm_struct { diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index 14f4019ac1..c022b4a407 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -44,11 +44,13 @@ extern struct mm_struct init_mm; #define PF_IDLETASK 4 /* Is this one of the per-CPU idle domains? */ #define PF_PRIVILEGED 5 /* Is this domain privileged? */ #define PF_CONSOLEWRITEBUG 6 /* Has this domain used the obsolete console? */ +#define PF_PHYSDEV 7 /* May this domain do IO to physical devices? */ #include #include #define IS_PRIV(_p) (test_bit(PF_PRIVILEGED, &(_p)->flags)) +#define IS_CAPABLE_PHYSDEV(_p) (test_bit(PF_PHYSDEV, &(_p)->flags)) struct task_struct; @@ -174,6 +176,14 @@ struct task_struct spinlock_t pcidev_lock; struct list_head pcidev_list; + /* The following IO bitmap stuff is x86-dependent. */ + u64 io_bitmap_sel; /* Selector to tell us which part of the IO bitmap are + * "interesting" (i.e. have clear bits) */ + + /* Handy macro - number of bytes of the IO bitmap, per selector bit. */ +#define IOBMP_SELBIT_LWORDS ( IO_BITMAP_SIZE / 64 ) + unsigned long *io_bitmap; /* Pointer to task's IO bitmap or NULL */ + unsigned long flags; atomic_t refcnt;